<!DOCTYPE HTML>
<html lang="zh-CN">


<head>
    <meta charset="utf-8">
    <meta name="keywords" content="数据预处理, 博客">
    <meta name="description" content="个人学习储备知识与经验的秘密基地">
    <meta http-equiv="X-UA-Compatible" content="IE=edge">
    <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=no">
    <meta name="renderer" content="webkit|ie-stand|ie-comp">
    <meta name="mobile-web-app-capable" content="yes">
    <meta name="format-detection" content="telephone=no">
    <meta name="apple-mobile-web-app-capable" content="yes">
    <meta name="apple-mobile-web-app-status-bar-style" content="black-translucent">
    <!-- Global site tag (gtag.js) - Google Analytics -->

<script async src="https://www.googletagmanager.com/gtag/js?id=G-KDXT8Y4CLW"></script>
<script>
    window.dataLayer = window.dataLayer || [];
    function gtag() {
        dataLayer.push(arguments);
    }

    gtag('js', new Date());
    gtag('config', 'G-KDXT8Y4CLW');
</script>


    <title>数据预处理 | 不二博客</title>
    <link rel="icon" type="image/png" href="https://cdn.jsdelivr.net/gh/weiyouwozuiku/weiyouwozuiku.github.io@master/favicon.png">

    <link rel="stylesheet" type="text/css" href="https://cdn.jsdelivr.net/gh/weiyouwozuiku/weiyouwozuiku.github.io@master/libs/awesome/css/all.css">
    <link rel="stylesheet" type="text/css" href="https://cdn.jsdelivr.net/gh/weiyouwozuiku/weiyouwozuiku.github.io@master/libs/materialize/materialize.min.css">
    <link rel="stylesheet" type="text/css" href="https://cdn.jsdelivr.net/gh/weiyouwozuiku/weiyouwozuiku.github.io@master/libs/aos/aos.css">
    <link rel="stylesheet" type="text/css" href="https://cdn.jsdelivr.net/gh/weiyouwozuiku/weiyouwozuiku.github.io@master/libs/animate/animate.min.css">
    <link rel="stylesheet" type="text/css" href="https://cdn.jsdelivr.net/gh/weiyouwozuiku/weiyouwozuiku.github.io@master/libs/lightGallery/css/lightgallery.min.css">
    <link rel="stylesheet" type="text/css" href="https://cdn.jsdelivr.net/gh/weiyouwozuiku/weiyouwozuiku.github.io@master/css/matery.css">
    <link rel="stylesheet" type="text/css" href="https://cdn.jsdelivr.net/gh/weiyouwozuiku/weiyouwozuiku.github.io@master/css/my.css">

    <script src="https://cdn.jsdelivr.net/gh/weiyouwozuiku/weiyouwozuiku.github.io@master/libs/jquery/jquery.min.js"></script>

<meta name="generator" content="Hexo 6.3.0">
<style>.github-emoji { position: relative; display: inline-block; width: 1.2em; min-height: 1.2em; overflow: hidden; vertical-align: top; color: transparent; }  .github-emoji > span { position: relative; z-index: 10; }  .github-emoji img, .github-emoji .fancybox { margin: 0 !important; padding: 0 !important; border: none !important; outline: none !important; text-decoration: none !important; user-select: none !important; cursor: auto !important; }  .github-emoji img { height: 1.2em !important; width: 1.2em !important; position: absolute !important; left: 50% !important; top: 50% !important; transform: translate(-50%, -50%) !important; user-select: none !important; cursor: auto !important; } .github-emoji-fallback { color: inherit; } .github-emoji-fallback img { opacity: 0 !important; }</style>
<link rel="alternate" href="/atom.xml" title="不二博客" type="application/atom+xml">
</head>




<body>
    <header class="navbar-fixed">
    <nav id="headNav" class="bg-color nav-transparent">
        <div id="navContainer" class="nav-wrapper container">
            <div class="brand-logo">
                <a href="/" class="waves-effect waves-light">
                    
                    <img src="https://cdn.jsdelivr.net/gh/weiyouwozuiku/weiyouwozuiku.github.io@master/medias/logo.png" class="logo-img" alt="LOGO">
                    
                    <span class="logo-span">不二博客</span>
                </a>
            </div>
            

<a href="#" data-target="mobile-nav" class="sidenav-trigger button-collapse"><i class="fas fa-bars"></i></a>
<ul class="right nav-menu">
  
  <li class="hide-on-med-and-down nav-item">
    
    <a href="/" class="waves-effect waves-light">
      
      <i class="fas fa-home" style="zoom: 0.6;"></i>
      
      <span>首页</span>
    </a>
    
  </li>
  
  <li class="hide-on-med-and-down nav-item">
    
    <a href="/tags" class="waves-effect waves-light">
      
      <i class="fas fa-tags" style="zoom: 0.6;"></i>
      
      <span>标签</span>
    </a>
    
  </li>
  
  <li class="hide-on-med-and-down nav-item">
    
    <a href="/categories" class="waves-effect waves-light">
      
      <i class="fas fa-bookmark" style="zoom: 0.6;"></i>
      
      <span>分类</span>
    </a>
    
  </li>
  
  <li class="hide-on-med-and-down nav-item">
    
    <a href="/archives" class="waves-effect waves-light">
      
      <i class="fas fa-archive" style="zoom: 0.6;"></i>
      
      <span>归档</span>
    </a>
    
  </li>
  
  <li class="hide-on-med-and-down nav-item">
    
    <a href="/about" class="waves-effect waves-light">
      
      <i class="fas fa-user-circle" style="zoom: 0.6;"></i>
      
      <span>关于</span>
    </a>
    
  </li>
  
  <li class="hide-on-med-and-down nav-item">
    
    <a href="/contact" class="waves-effect waves-light">
      
      <i class="fas fa-comments" style="zoom: 0.6;"></i>
      
      <span>留言板</span>
    </a>
    
  </li>
  
  <li class="hide-on-med-and-down nav-item">
    
    <a href="/friends" class="waves-effect waves-light">
      
      <i class="fas fa-address-book" style="zoom: 0.6;"></i>
      
      <span>友情链接</span>
    </a>
    
  </li>
  
  <li>
    <a href="#searchModal" class="modal-trigger waves-effect waves-light">
      <i id="searchIcon" class="fas fa-search" title="搜索" style="zoom: 0.85;"></i>
    </a>
  </li>
</ul>


<div id="mobile-nav" class="side-nav sidenav">

    <div class="mobile-head bg-color">
        
        <img src="https://cdn.jsdelivr.net/gh/weiyouwozuiku/weiyouwozuiku.github.io@master/medias/logo.png" class="logo-img circle responsive-img">
        
        <div class="logo-name">不二博客</div>
        <div class="logo-desc">
            
            个人学习储备知识与经验的秘密基地
            
        </div>
    </div>

    

    <ul class="menu-list mobile-menu-list">
        
        <li class="m-nav-item">
	  
		<a href="/" class="waves-effect waves-light">
			
			    <i class="fa-fw fas fa-home"></i>
			
			首页
		</a>
          
        </li>
        
        <li class="m-nav-item">
	  
		<a href="/tags" class="waves-effect waves-light">
			
			    <i class="fa-fw fas fa-tags"></i>
			
			标签
		</a>
          
        </li>
        
        <li class="m-nav-item">
	  
		<a href="/categories" class="waves-effect waves-light">
			
			    <i class="fa-fw fas fa-bookmark"></i>
			
			分类
		</a>
          
        </li>
        
        <li class="m-nav-item">
	  
		<a href="/archives" class="waves-effect waves-light">
			
			    <i class="fa-fw fas fa-archive"></i>
			
			归档
		</a>
          
        </li>
        
        <li class="m-nav-item">
	  
		<a href="/about" class="waves-effect waves-light">
			
			    <i class="fa-fw fas fa-user-circle"></i>
			
			关于
		</a>
          
        </li>
        
        <li class="m-nav-item">
	  
		<a href="/contact" class="waves-effect waves-light">
			
			    <i class="fa-fw fas fa-comments"></i>
			
			留言板
		</a>
          
        </li>
        
        <li class="m-nav-item">
	  
		<a href="/friends" class="waves-effect waves-light">
			
			    <i class="fa-fw fas fa-address-book"></i>
			
			友情链接
		</a>
          
        </li>
        
        
        <li><div class="divider"></div></li>
        <li>
            <a href="https://github.com/weiyouwozuiku" class="waves-effect waves-light" target="_blank">
                <i class="fab fa-github-square fa-fw"></i>Fork Me
            </a>
        </li>
        
    </ul>
</div>


        </div>

        
            <style>
    .nav-transparent .github-corner {
        display: none !important;
    }

    .github-corner {
        position: absolute;
        z-index: 10;
        top: 0;
        right: 0;
        border: 0;
        transform: scale(1.1);
    }

    .github-corner svg {
        color: #0f9d58;
        fill: #fff;
        height: 64px;
        width: 64px;
    }

    .github-corner:hover .octo-arm {
        animation: a 0.56s ease-in-out;
    }

    .github-corner .octo-arm {
        animation: none;
    }

    @keyframes a {
        0%,
        to {
            transform: rotate(0);
        }
        20%,
        60% {
            transform: rotate(-25deg);
        }
        40%,
        80% {
            transform: rotate(10deg);
        }
    }
</style>

<a href="https://github.com/weiyouwozuiku" class="github-corner tooltipped hide-on-med-and-down" target="_blank"
   data-tooltip="Fork Me" data-position="left" data-delay="50">
    <svg viewBox="0 0 250 250" aria-hidden="true">
        <path d="M0,0 L115,115 L130,115 L142,142 L250,250 L250,0 Z"></path>
        <path d="M128.3,109.0 C113.8,99.7 119.0,89.6 119.0,89.6 C122.0,82.7 120.5,78.6 120.5,78.6 C119.2,72.0 123.4,76.3 123.4,76.3 C127.3,80.9 125.5,87.3 125.5,87.3 C122.9,97.6 130.6,101.9 134.4,103.2"
              fill="currentColor" style="transform-origin: 130px 106px;" class="octo-arm"></path>
        <path d="M115.0,115.0 C114.9,115.1 118.7,116.5 119.8,115.4 L133.7,101.6 C136.9,99.2 139.9,98.4 142.2,98.6 C133.8,88.0 127.5,74.4 143.8,58.0 C148.5,53.4 154.0,51.2 159.7,51.0 C160.3,49.4 163.2,43.6 171.4,40.1 C171.4,40.1 176.1,42.5 178.8,56.2 C183.1,58.6 187.2,61.8 190.9,65.4 C194.5,69.0 197.7,73.2 200.1,77.6 C213.8,80.2 216.3,84.9 216.3,84.9 C212.7,93.1 206.9,96.0 205.4,96.6 C205.1,102.4 203.0,107.8 198.3,112.5 C181.9,128.9 168.3,122.5 157.7,114.1 C157.9,116.9 156.7,120.9 152.7,124.9 L141.0,136.5 C139.8,137.7 141.6,141.9 141.8,141.8 Z"
              fill="currentColor" class="octo-body"></path>
    </svg>
</a>
        
    </nav>

</header>

    
<script src="https://cdn.jsdelivr.net/gh/weiyouwozuiku/weiyouwozuiku.github.io@master/libs/cryptojs/crypto-js.min.js"></script>
<script>
    (function() {
        let pwd = '';
        if (pwd && pwd.length > 0) {
            if (pwd !== CryptoJS.SHA256(prompt('请输入访问本文章的密码')).toString(CryptoJS.enc.Hex)) {
                alert('密码错误，将返回主页！');
                location.href = '/';
            }
        }
    })();
</script>




<div class="bg-cover pd-header post-cover" style="background-image: url('https://cdn.jsdelivr.net/gh/weiyouwozuiku/weiyouwozuiku.github.io@src/source/_posts/PageImg/算法/数据处理.jpeg')">
    <div class="container" style="right: 0px;left: 0px;">
        <div class="row">
            <div class="col s12 m12 l12">
                <div class="brand">
                    <h1 class="description center-align post-title">数据预处理</h1>
                </div>
            </div>
        </div>
    </div>
</div>




<main class="post-container content">

    
    <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/weiyouwozuiku/weiyouwozuiku.github.io@master/libs/tocbot/tocbot.css">
<style>
    #articleContent h1::before,
    #articleContent h2::before,
    #articleContent h3::before,
    #articleContent h4::before,
    #articleContent h5::before,
    #articleContent h6::before {
        display: block;
        content: " ";
        height: 100px;
        margin-top: -100px;
        visibility: hidden;
    }

    #articleContent :focus {
        outline: none;
    }

    .toc-fixed {
        position: fixed;
        top: 64px;
    }

    .toc-widget {
        width: 345px;
        padding-left: 20px;
    }

    .toc-widget .toc-title {
        padding: 35px 0 15px 17px;
        font-size: 1.5rem;
        font-weight: bold;
        line-height: 1.5rem;
    }

    .toc-widget ol {
        padding: 0;
        list-style: none;
    }

    #toc-content {
        padding-bottom: 30px;
        overflow: auto;
    }

    #toc-content ol {
        padding-left: 10px;
    }

    #toc-content ol li {
        padding-left: 10px;
    }

    #toc-content .toc-link:hover {
        color: #42b983;
        font-weight: 700;
        text-decoration: underline;
    }

    #toc-content .toc-link::before {
        background-color: transparent;
        max-height: 25px;

        position: absolute;
        right: 23.5vw;
        display: block;
    }

    #toc-content .is-active-link {
        color: #42b983;
    }

    #floating-toc-btn {
        position: fixed;
        right: 15px;
        bottom: 76px;
        padding-top: 15px;
        margin-bottom: 0;
        z-index: 998;
    }

    #floating-toc-btn .btn-floating {
        width: 48px;
        height: 48px;
    }

    #floating-toc-btn .btn-floating i {
        line-height: 48px;
        font-size: 1.4rem;
    }
</style>
<div class="row">
    <div id="main-content" class="col s12 m12 l9">
        <!-- 文章内容详情 -->
<div id="artDetail">
    <div class="card">
        <div class="card-content article-info">
            <div class="row tag-cate">
                <div class="col s7">
                    
                    <div class="article-tag">
                        
                            <a href="/tags/%E6%95%B0%E6%8D%AE%E9%A2%84%E5%A4%84%E7%90%86/">
                                <span class="chip bg-color">数据预处理</span>
                            </a>
                        
                    </div>
                    
                </div>
                <div class="col s5 right-align">
                    
                    <div class="post-cate">
                        <i class="fas fa-bookmark fa-fw icon-category"></i>
                        
                            <a href="/categories/%E7%AE%97%E6%B3%95/" class="post-category">
                                算法
                            </a>
                        
                    </div>
                    
                </div>
            </div>

            <div class="post-info">
                
                <div class="post-date info-break-policy">
                    <i class="far fa-calendar-minus fa-fw"></i>发布日期:&nbsp;&nbsp;
                    2019-10-07
                </div>
                

                
                <div class="post-date info-break-policy">
                    <i class="far fa-calendar-check fa-fw"></i>更新日期:&nbsp;&nbsp;
                    2022-01-14
                </div>
                

                
                <div class="info-break-policy">
                    <i class="far fa-file-word fa-fw"></i>文章字数:&nbsp;&nbsp;
                    3.5k
                </div>
                

                
                <div class="info-break-policy">
                    <i class="far fa-clock fa-fw"></i>阅读时长:&nbsp;&nbsp;
                    12 分
                </div>
                

                
                    <div id="busuanzi_container_page_pv" class="info-break-policy">
                        <i class="far fa-eye fa-fw"></i>阅读次数:&nbsp;&nbsp;
                        <span id="busuanzi_value_page_pv"></span>
                    </div>
				
            </div>
        </div>
        <hr class="clearfix">

        
        <!-- 是否加载使用自带的 prismjs. -->
        <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/weiyouwozuiku/weiyouwozuiku.github.io@master/libs/prism/prism.css">
        

        
        <!-- 代码块折行 -->
        <style type="text/css">
            code[class*="language-"], pre[class*="language-"] { white-space: pre-wrap !important; }
        </style>
        

        <div class="card-content article-card-content">
            <div id="articleContent">
                <p>数据预处理的主要步骤分为：<strong>数据清理、数据集成、数据规约和数据变换</strong>。本文将从这四个方面详细的介绍具体的方法。如果在一个项目中，你在这几个方面的数据处理做的都很不错，对于之后的建模具有极大的帮助，并且能快速达到一个还不错的结果。</p>
<h2 id="数据清理">数据清理<a class="anchor" href="#数据清理">¶</a></h2>
<p>数据清理(data cleaning) 的主要思想是通过填补缺失值、光滑噪声数据，平滑或删除离群点，并解决数据的不一致性来“清理“数据。如果用户认为数据时脏乱的，他们不太会相信基于这些数据的挖掘结果，即输出的结果是不可靠的。</p>
<h3 id="1、缺失值的处理">1、缺失值的处理<a class="anchor" href="#1、缺失值的处理">¶</a></h3>
<p>由于现实世界中，获取信息和数据的过程中，会存在各类的原因导致数据丢失和空缺。针对这些缺失值的处理方法，主要是基于变量的分布特性和变量的重要性（信息量和预测能力）采用不同的方法。主要分为以下几种：</p>
<ul>
<li>
<p>删除变量：若变量的缺失率较高（大于80%），覆盖率较低，且重要性较低，可以直接将变量删除。</p>
</li>
<li>
<p>定值填充：工程中常见用-9999进行替代</p>
</li>
<li>
<p>统计量填充：若缺失率较低（小于95%）且重要性较低，则根据数据分布的情况进行填充。对于数据符合均匀分布，用该变量的均值填补缺失，对于数据存在倾斜分布的情况，采用中位数进行填补。</p>
</li>
<li>
<p>插值法填充：包括随机插值，多重差补法，热平台插补，拉格朗日插值，牛顿插值等</p>
</li>
<li>
<p>模型填充：使用回归、贝叶斯、随机森林、决策树等模型对缺失数据进行预测。</p>
</li>
<li>
<p>哑变量填充：若变量是离散型，且不同值较少，可转换成哑变量，例如性别SEX变量，存在<em>male,fameal,NA</em>三个不同的值，可将该列转换成 <em>IS_SEX_MALE, IS_SEX_FEMALE, IS_SEX_NA</em>。若某个变量存在十几个不同的值，可根据每个值的频数，将频数较小的值归为一类’other’，降低维度。此做法可最大化保留变量的信息。</p>
</li>
</ul>
<p>总结来看，常用的做法是：先用<code>pandas.isnull.sum()</code>检测出变量的缺失比例，考虑删除或者填充，若需要填充的变量是连续型，一般采用均值法和随机差值进行填充，若变量是离散型，通常采用中位数或哑变量进行填充。</p>
<p><strong>注意：若对变量进行分箱离散化，一般会将缺失值单独作为一个箱子（离散变量的一个值）</strong></p>
<h3 id="2、离群点处理">2、离群点处理<a class="anchor" href="#2、离群点处理">¶</a></h3>
<p>异常值是数据分布的常态，处于特定分布区域或范围之外的数据通常被定义为异常或噪声。异常分为两种：“伪异常”，由于特定的业务运营动作产生，是正常反应业务的状态，而不是数据本身的异常；“真异常”，不是由于特定的业务运营动作产生，而是数据本身分布异常，即离群点。主要有以下检测离群点的方法：</p>
<ul>
<li>简单统计分析：根据箱线图、各分位点判断是否存在异常，例如pandas的describe函数可以快速发现异常值。</li>
<li><img src="https://math.now.sh?inline=3%5Csigma" style="filter: opacity(95%);transform:scale(0.85);text-align:center;display:inline-block;margin: 0;">原则：若数据存在正态分布，偏离均值的<img src="https://math.now.sh?inline=3%5Csigma" style="filter: opacity(95%);transform:scale(0.85);text-align:center;display:inline-block;margin: 0;">之外. 通常定义<img src="https://math.now.sh?inline=P%28%7Cx-%5Cmu%7C%3E3%5Csigma%29%5Cleq%3D0.003" style="filter: opacity(95%);transform:scale(0.85);text-align:center;display:inline-block;margin: 0;">范围内的点为离群点。</li>
<li>基于绝对离差中位数（MAD）：这是一种稳健对抗离群数据的距离值方法，采用计算各观测值与平均值的距离总和的方法。放大了离群值的影响。</li>
<li>基于距离：通过定义对象之间的临近性度量，根据距离判断异常对象是否远离其他对象，缺点是计算复杂度较高，不适用于大数据集和存在不同密度区域的数据集</li>
<li>基于密度：离群点的局部密度显著低于大部分近邻点，适用于非均匀的数据集</li>
<li>基于聚类：利用聚类算法，丢弃远离其他簇的小簇。</li>
</ul>
<p>总结来看，在数据处理阶段将离群点作为影响数据质量的异常点考虑，而不是作为通常所说的异常检测目标点，因而楼主一般采用较为简单直观的方法，结合箱线图和MAD的统计方法判断变量的离群点。</p>
<p>具体的处理手段：</p>
<ul>
<li>根据异常点的数量和影响，考虑是否将该条记录删除，信息损失多</li>
<li>若对数据做了log-scale 对数变换后消除了异常值，则此方法生效，且不损失信息</li>
<li>平均值或中位数替代异常点，简单高效，信息的损失较少</li>
<li>在训练树模型时，树模型对离群点的鲁棒性较高，无信息损失，不影响模型训练效果</li>
</ul>
<h3 id="3、噪声处理">3、噪声处理<a class="anchor" href="#3、噪声处理">¶</a></h3>
<p>噪声是变量的随机误差和方差，是观测点和真实点之间的误差，即<img src="https://math.now.sh?inline=obs%3Dx%2B%5Cepsilon" style="filter: opacity(95%);transform:scale(0.85);text-align:center;display:inline-block;margin: 0;">。通常的处理办法：对数据进行分箱操作，等频或等宽分箱，然后用每个箱的平均数，中位数或者边界值（不同数据分布，处理方法不同）代替箱中所有的数，起到平滑数据的作用。另外一种做法是，建立该变量和预测变量的回归模型，根据回归系数和预测变量，反解出自变量的近似值。</p>
<h2 id="数据集成">数据集成<a class="anchor" href="#数据集成">¶</a></h2>
<p>数据分析任务多半涉及数据集成。数据集成将多个数据源中的数据结合成、存放在一个一致的数据存储，如数据仓库中。这些源可能包括多个数据库、数据方或一般文件。</p>
<h3 id="1-实体识别问题">1. 实体识别问题<a class="anchor" href="#1-实体识别问题">¶</a></h3>
<p>例如，数据分析者或计算机如何才能确信一个数 据库中的<em>customer_id</em>和另一个数据库中的 <em>cust_number</em>指的是同一实体?通常，数据库和数据仓库 有元数据——关于数据的数据。这种元数据可以帮助避免模式集成中的错误。</p>
<h3 id="2-冗余问题">2. 冗余问题<a class="anchor" href="#2-冗余问题">¶</a></h3>
<p>一个属性是冗余的，如果它能由另一个表“导出”;如年薪。属性或 维命名的不一致也可能导致数据集中的冗余。 用相关性检测冗余：数值型变量可计算相关系数矩阵，标称型变量可计算卡方检验。</p>
<h3 id="3-数据值的冲突和处理">3. 数据值的冲突和处理<a class="anchor" href="#3-数据值的冲突和处理">¶</a></h3>
<p>不同数据源，在统一合并时，保持规范化，去重。</p>
<h2 id="数据规约">数据规约<a class="anchor" href="#数据规约">¶</a></h2>
<p>数据归约技术可以用来得到数据集的归约表示，它小得多，但仍接近地保持原数据的完整性。 这样，在归约后的数据集上挖掘将更有效，并产生相同(或几乎相同)的分析结果。一般有如下策略：</p>
<h3 id="1、维度规约">1、维度规约<a class="anchor" href="#1、维度规约">¶</a></h3>
<p>用于数据分析的数据可能包含数以百计的属性，其中大部分属性与挖掘任务不相关，是冗余的。维度归约通过删除不相关的属性，来减少数据量，并保证信息的损失最小。</p>
<p><strong>属性子集选择</strong>：目标是找出最小属性集，使得数据类的概率分布尽可能地接近使用所有属性的原分布。在压缩 的属性集上挖掘还有其它的优点。它减少了出现在发现模式上的属性的数目，使得模式更易于理解。</p>
<ul>
<li>逐步向前选择：该过程由空属性集开始，选择原属性集中最好的属性，并将它添加到该集合中。在其后的每一次迭代，将原属性集剩下的属性中的最好的属性添加到该集合中。</li>
<li>逐步向后删除：该过程由整个属性集开始。在每一步，删除掉尚在属性集中的最坏属性。</li>
<li>向前选择和向后删除的结合：向前选择和向后删除方法可以结合在一起，每一步选择一个最 好的属性，并在剩余属性中删除一个最坏的属性。</li>
</ul>
<p><code>python scikit-learn </code>中的递归特征消除算法<code>Recursive feature elimination (RFE)</code>，就是利用这样的思想进行特征子集筛选的，一般考虑建立<code>SVM</code>或回归模型。</p>
<p><strong>单变量重要性</strong>：分析单变量和目标变量的相关性，删除预测能力较低的变量。这种方法不同于属性子集选择，通常从统计学和信息的角度去分析。</p>
<ul>
<li>pearson相关系数和卡方检验，分析目标变量和单变量的相关性。</li>
<li>回归系数：训练线性回归或逻辑回归，提取每个变量的表决系数，进行重要性排序。</li>
<li>树模型的Gini指数：训练决策树模型，提取每个变量的重要度，即Gini指数进行排序。</li>
<li>Lasso正则化：训练回归模型时，加入L1正则化参数，将特征向量稀疏化。</li>
<li>IV指标：风控模型中，通常求解每个变量的IV值，来定义变量的重要度，一般将阀值设定在0.02以上。</li>
</ul>
<p>以上提到的方法，没有讲解具体的理论知识和实现方法，需要同学们自己去熟悉掌握。楼主通常的做法是根据业务需求来定，如果基于业务的用户或商品特征，需要较多的解释性，考虑采用统计上的一些方法，如变量的分布曲线，直方图等，再计算相关性指标，最后去考虑一些模型方法。如果建模需要，则通常采用模型方法去筛选特征，如果用一些更为复杂的GBDT，DNN等模型，建议不做特征选择，而做特征交叉。</p>
<h3 id="2、维度变换">2、维度变换<a class="anchor" href="#2、维度变换">¶</a></h3>
<p>维度变换是将现有数据降低到更小的维度，尽量保证数据信息的完整性。楼主将介绍常用的几种有损失的维度变换方法，将大大地提高实践中建模的效率</p>
<ul>
<li>主成分分析（PCA）和因子分析（FA）：PCA通过空间映射的方式，将当前维度映射到更低的维度，使得每个变量在新空间的方差最大。FA则是找到当前特征向量的公因子（维度更小），用公因子的线性组合来描述当前的特征向量。</li>
<li>奇异值分解（SVD）：SVD的降维可解释性较低，且计算量比PCA大，一般用在稀疏矩阵上降维，例如图片压缩，推荐系统。</li>
<li>聚类：将某一类具有相似性的特征聚到单个变量，从而大大降低维度。</li>
<li>线性组合：将多个变量做线性回归，根据每个变量的表决系数，赋予变量权重，可将该类变量根据权重组合成一个变量。</li>
<li>流行学习：流行学习中一些复杂的非线性方法，可参考skearn：<a href="https://link.zhihu.com/?target=https%3A//scikit-learn.org/stable/auto_examples/manifold/plot_lle_digits.html">LLE Example</a></li>
</ul>
<h2 id="数据变换">数据变换<a class="anchor" href="#数据变换">¶</a></h2>
<p>数据变换包括对数据进行规范化，离散化，稀疏化处理，达到适用于挖掘的目的。</p>
<h3 id="1、规范化处理">1、规范化处理<a class="anchor" href="#1、规范化处理">¶</a></h3>
<p>数据中不同特征的量纲可能不一致，数值间的差别可能很大，不进行处理可能会影响到数据分析的结果，因此，需要对数据按照一定比例进行缩放，使之落在一个特定的区域，便于进行综合分析。特别是基于距离的挖掘方法，聚类，KNN，SVM一定要做规范化处理。</p>
<ul>
<li>最大 - 最小规范化：将数据映射到[0,1]区间，<img src="https://math.now.sh?inline=x_%7Bnew%7D%3D%5Cfrac%7Bx-x_%7Bmin%7D%7D%7Bx_%7Bmax%7D-x_%7Bmin%7D%7D" style="filter: opacity(95%);transform:scale(0.85);text-align:center;display:inline-block;margin: 0;"></li>
<li>Z-Score标准化：处理后的数据均值为0，方差为1，<img src="https://math.now.sh?inline=x_%7Bnew%7D%3D%5Cfrac%7Bx-%5Coverline%7Bx%7D%7D%7B%5Csigma%7D" style="filter: opacity(95%);transform:scale(0.85);text-align:center;display:inline-block;margin: 0;"></li>
<li>Log变换：在时间序列数据中，对于数据量级相差较大的变量，通常做Log函数的变换，<img src="https://math.now.sh?inline=x_%7Bnew%7D%3Dlogx" style="filter: opacity(95%);transform:scale(0.85);text-align:center;display:inline-block;margin: 0;"></li>
</ul>
<h3 id="2、离散化处理">2、离散化处理<a class="anchor" href="#2、离散化处理">¶</a></h3>
<p>数据离散化是指将连续的数据进行分段，使其变为一段段离散化的区间。分段的原则有基于等距离、等频率或优化的方法。数据离散化的原因主要有以下几点：</p>
<ul>
<li>模型需要：比如决策树、朴素贝叶斯等算法，都是基于离散型的数据展开的。如果要使用该类算法，必须将离散型的数据进行。有效的离散化能减小算法的时间和空间开销，提高系统对样本的分类聚类能力和抗噪声能力。</li>
<li>离散化的特征相对于连续型特征更易理解。</li>
<li>可以有效的克服数据中隐藏的缺陷，使模型结果更加稳定。等频法：使得每个箱中的样本数量相等，例如总样本n=100，分成k=5个箱，则分箱原则是保证落入每个箱的样本量=20。</li>
</ul>
<p>等宽法：使得属性的箱宽度相等，例如年龄变量（0-100之间），可分成 [0,20]，[20,40]，[40,60]，[60,80]，[80,100]五个等宽的箱。</p>
<p>聚类法：根据聚类出来的簇，每个簇中的数据为一个箱，簇的数量模型给定。</p>
<h3 id="3、稀疏化处理：">3、稀疏化处理：<a class="anchor" href="#3、稀疏化处理：">¶</a></h3>
<p>针对离散型且标称变量，无法进行有序的LabelEncoder时，通常考虑将变量做0，1哑变量的稀疏化处理，例如动物类型变量中含有猫，狗，猪，羊四个不同值，将该变量转换成is_猪，is_猫，is_狗，is_羊四个哑变量。若是变量的不同值较多，则根据频数，将出现次数较少的值统一归为一类’rare’。稀疏化处理既有利于模型快速收敛，又能提升模型的抗噪能力。</p>

                
            </div>
            <hr/>

            

    <div class="reprint" id="reprint-statement">
        
            <div class="reprint__author">
                <span class="reprint-meta" style="font-weight: bold;">
                    <i class="fas fa-user">
                        文章作者:
                    </i>
                </span>
                <span class="reprint-info">
                    <a href="/about" rel="external nofollow noreferrer">不二</a>
                </span>
            </div>
            <div class="reprint__type">
                <span class="reprint-meta" style="font-weight: bold;">
                    <i class="fas fa-link">
                        文章链接:
                    </i>
                </span>
                <span class="reprint-info">
                    <a href="http://buerlog.top/2019/10/07/suan-fa/shu-ju-yu-chu-li/">http://buerlog.top/2019/10/07/suan-fa/shu-ju-yu-chu-li/</a>
                </span>
            </div>
            <div class="reprint__notice">
                <span class="reprint-meta" style="font-weight: bold;">
                    <i class="fas fa-copyright">
                        版权声明:
                    </i>
                </span>
                <span class="reprint-info">
                    本博客所有文章除特別声明外，均采用
                    <a href="https://creativecommons.org/licenses/by/4.0/deed.zh" rel="external nofollow noreferrer" target="_blank">CC BY 4.0</a>
                    许可协议。转载请注明来源
                    <a href="/about" target="_blank">不二</a>
                    !
                </span>
            </div>
        
    </div>

    <script async defer>
      document.addEventListener("copy", function (e) {
        let toastHTML = '<span>复制成功，请遵循本文的转载规则</span><button class="btn-flat toast-action" onclick="navToReprintStatement()" style="font-size: smaller">查看</a>';
        M.toast({html: toastHTML})
      });

      function navToReprintStatement() {
        $("html, body").animate({scrollTop: $("#reprint-statement").offset().top - 80}, 800);
      }
    </script>



            <div class="tag_share" style="display: block;">
                <div class="post-meta__tag-list" style="display: inline-block;">
                    
                        <div class="article-tag">
                            
                                <a href="/tags/%E6%95%B0%E6%8D%AE%E9%A2%84%E5%A4%84%E7%90%86/">
                                    <span class="chip bg-color">数据预处理</span>
                                </a>
                            
                        </div>
                    
                </div>
                <div class="post_share" style="zoom: 80%; width: fit-content; display: inline-block; float: right; margin: -0.15rem 0;">
                    <link rel="stylesheet" type="text/css" href="https://cdn.jsdelivr.net/gh/weiyouwozuiku/weiyouwozuiku.github.io@master/libs/share/css/share.min.css">
<div id="article-share">

    
    <div class="social-share" data-sites="twitter,facebook,google,qq,qzone,wechat,weibo,douban,linkedin" data-wechat-qrcode-helper="<p>微信扫一扫即可分享！</p>"></div>
    <script src="https://cdn.jsdelivr.net/gh/weiyouwozuiku/weiyouwozuiku.github.io@master/libs/share/js/social-share.min.js"></script>
    

    

</div>

                </div>
            </div>
            
                <style>
    #reward {
        margin: 40px 0;
        text-align: center;
    }

    #reward .reward-link {
        font-size: 1.4rem;
        line-height: 38px;
    }

    #reward .btn-floating:hover {
        box-shadow: 0 6px 12px rgba(0, 0, 0, 0.2), 0 5px 15px rgba(0, 0, 0, 0.2);
    }

    #rewardModal {
        width: 320px;
        height: 350px;
    }

    #rewardModal .reward-title {
        margin: 15px auto;
        padding-bottom: 5px;
    }

    #rewardModal .modal-content {
        padding: 10px;
    }

    #rewardModal .close {
        position: absolute;
        right: 15px;
        top: 15px;
        color: rgba(0, 0, 0, 0.5);
        font-size: 1.3rem;
        line-height: 20px;
        cursor: pointer;
    }

    #rewardModal .close:hover {
        color: #ef5350;
        transform: scale(1.3);
        -moz-transform:scale(1.3);
        -webkit-transform:scale(1.3);
        -o-transform:scale(1.3);
    }

    #rewardModal .reward-tabs {
        margin: 0 auto;
        width: 210px;
    }

    .reward-tabs .tabs {
        height: 38px;
        margin: 10px auto;
        padding-left: 0;
    }

    .reward-content ul {
        padding-left: 0 !important;
    }

    .reward-tabs .tabs .tab {
        height: 38px;
        line-height: 38px;
    }

    .reward-tabs .tab a {
        color: #fff;
        background-color: #ccc;
    }

    .reward-tabs .tab a:hover {
        background-color: #ccc;
        color: #fff;
    }

    .reward-tabs .wechat-tab .active {
        color: #fff !important;
        background-color: #22AB38 !important;
    }

    .reward-tabs .alipay-tab .active {
        color: #fff !important;
        background-color: #019FE8 !important;
    }

    .reward-tabs .reward-img {
        width: 210px;
        height: 210px;
    }
</style>

<div id="reward">
    <a href="#rewardModal" class="reward-link modal-trigger btn-floating btn-medium waves-effect waves-light red">赏</a>

    <!-- Modal Structure -->
    <div id="rewardModal" class="modal">
        <div class="modal-content">
            <a class="close modal-close"><i class="fas fa-times"></i></a>
            <h4 class="reward-title">你的赏识是我前进的动力</h4>
            <div class="reward-content">
                <div class="reward-tabs">
                    <ul class="tabs row">
                        <li class="tab col s6 alipay-tab waves-effect waves-light"><a href="#alipay">支付宝</a></li>
                        <li class="tab col s6 wechat-tab waves-effect waves-light"><a href="#wechat">微 信</a></li>
                    </ul>
                    <div id="alipay">
                        <img src="https://cdn.jsdelivr.net/gh/weiyouwozuiku/weiyouwozuiku.github.io@master/medias/reward/alipay.jpg" class="reward-img" alt="支付宝打赏二维码">
                    </div>
                    <div id="wechat">
                        <img src="https://cdn.jsdelivr.net/gh/weiyouwozuiku/weiyouwozuiku.github.io@master/medias/reward/wechat.png" class="reward-img" alt="微信打赏二维码">
                    </div>
                </div>
            </div>
        </div>
    </div>
</div>

<script>
    $(function () {
        $('.tabs').tabs();
    });
</script>

            
        </div>
    </div>

    

    

    

    

    

    

    

    

    

<article id="prenext-posts" class="prev-next articles">
    <div class="row article-row">
        
        <div class="article col s12 m6" data-aos="fade-up">
            <div class="article-badge left-badge text-color">
                <i class="fas fa-chevron-left"></i>&nbsp;上一篇</div>
            <div class="card">
                <a href="/2019/12/01/qian-ru-shi/sg90-duo-ji-kong-zhi/">
                    <div class="card-image">
                        
                        <img src="https://cdn.jsdelivr.net/gh/weiyouwozuiku/weiyouwozuiku.github.io@src/source/_posts/PageImg/嵌入式/pcb.jpg" class="responsive-img" alt="sg90舵机控制">
                        
                        <span class="card-title">sg90舵机控制</span>
                    </div>
                </a>
                <div class="card-content article-content">
                    <div class="summary block-with-text">
                        
                            
                        
                    </div>
                    <div class="publish-info">
                        <span class="publish-date">
                            <i class="far fa-clock fa-fw icon-date"></i>2019-12-01
                        </span>
                        <span class="publish-author">
                            
                            <i class="fas fa-bookmark fa-fw icon-category"></i>
                            
                            <a href="/categories/%E5%B5%8C%E5%85%A5%E5%BC%8F/" class="post-category">
                                    嵌入式
                                </a>
                            
                            
                        </span>
                    </div>
                </div>
                
                <div class="card-action article-tags">
                    
                    <a href="/tags/%E6%A0%91%E8%8E%93%E6%B4%BE/">
                        <span class="chip bg-color">树莓派</span>
                    </a>
                    
                    <a href="/tags/Arduino/">
                        <span class="chip bg-color">Arduino</span>
                    </a>
                    
                </div>
                
            </div>
        </div>
        
        
        <div class="article col s12 m6" data-aos="fade-up">
            <div class="article-badge right-badge text-color">
                下一篇&nbsp;<i class="fas fa-chevron-right"></i>
            </div>
            <div class="card">
                <a href="/2019/09/23/suan-fa/gui-yi-hua-suan-fa/">
                    <div class="card-image">
                        
                        <img src="https://cdn.jsdelivr.net/gh/weiyouwozuiku/weiyouwozuiku.github.io@src/source/_posts/PageImg/算法/归一化算法.jpg" class="responsive-img" alt="归一化算法">
                        
                        <span class="card-title">归一化算法</span>
                    </div>
                </a>
                <div class="card-content article-content">
                    <div class="summary block-with-text">
                        
                            
                        
                    </div>
                    <div class="publish-info">
                            <span class="publish-date">
                                <i class="far fa-clock fa-fw icon-date"></i>2019-09-23
                            </span>
                        <span class="publish-author">
                            
                            <i class="fas fa-bookmark fa-fw icon-category"></i>
                            
                            <a href="/categories/%E7%AE%97%E6%B3%95/" class="post-category">
                                    算法
                                </a>
                            
                            
                        </span>
                    </div>
                </div>
                
                <div class="card-action article-tags">
                    
                    <a href="/tags/%E6%95%B0%E6%8D%AE%E5%88%86%E6%9E%90/">
                        <span class="chip bg-color">数据分析</span>
                    </a>
                    
                </div>
                
            </div>
        </div>
        
    </div>
</article>

</div>


<script>
    $('#articleContent').on('copy', function (e) {
        // IE8 or earlier browser is 'undefined'
        if (typeof window.getSelection === 'undefined') return;

        var selection = window.getSelection();
        // if the selection is short let's not annoy our users.
        if (('' + selection).length < Number.parseInt('120')) {
            return;
        }

        // create a div outside of the visible area and fill it with the selected text.
        var bodyElement = document.getElementsByTagName('body')[0];
        var newdiv = document.createElement('div');
        newdiv.style.position = 'absolute';
        newdiv.style.left = '-99999px';
        bodyElement.appendChild(newdiv);
        newdiv.appendChild(selection.getRangeAt(0).cloneContents());

        // we need a <pre> tag workaround.
        // otherwise the text inside "pre" loses all the line breaks!
        if (selection.getRangeAt(0).commonAncestorContainer.nodeName === 'PRE') {
            newdiv.innerHTML = "<pre>" + newdiv.innerHTML + "</pre>";
        }

        var url = document.location.href;
        newdiv.innerHTML += '<br />'
            + '来源: 不二博客<br />'
            + '文章作者: 不二<br />'
            + '文章链接: <a href="' + url + '">' + url + '</a><br />'
            + '本文章著作权归作者所有，任何形式的转载都请注明出处。';

        selection.selectAllChildren(newdiv);
        window.setTimeout(function () {bodyElement.removeChild(newdiv);}, 200);
    });
</script>


<!-- 代码块功能依赖 -->
<script type="text/javascript" src="https://cdn.jsdelivr.net/gh/weiyouwozuiku/weiyouwozuiku.github.io@master/libs/codeBlock/codeBlockFuction.js"></script>

<!-- 代码语言 -->

<script type="text/javascript" src="https://cdn.jsdelivr.net/gh/weiyouwozuiku/weiyouwozuiku.github.io@master/libs/codeBlock/codeLang.js"></script>


<!-- 代码块复制 -->

<script type="text/javascript" src="https://cdn.jsdelivr.net/gh/weiyouwozuiku/weiyouwozuiku.github.io@master/libs/codeBlock/codeCopy.js"></script>


<!-- 代码块收缩 -->

<script type="text/javascript" src="https://cdn.jsdelivr.net/gh/weiyouwozuiku/weiyouwozuiku.github.io@master/libs/codeBlock/codeShrink.js"></script>


    </div>
    <div id="toc-aside" class="expanded col l3 hide-on-med-and-down">
        <div class="toc-widget card" style="background-color: white;">
            <div class="toc-title"><i class="far fa-list-alt"></i>&nbsp;&nbsp;目录</div>
            <div id="toc-content"></div>
        </div>
    </div>
</div>

<!-- TOC 悬浮按钮. -->

<div id="floating-toc-btn" class="hide-on-med-and-down">
    <a class="btn-floating btn-large bg-color">
        <i class="fas fa-list-ul"></i>
    </a>
</div>


<script src="https://cdn.jsdelivr.net/gh/weiyouwozuiku/weiyouwozuiku.github.io@master/libs/tocbot/tocbot.min.js"></script>
<script>
    $(function () {
        tocbot.init({
            tocSelector: '#toc-content',
            contentSelector: '#articleContent',
            headingsOffset: -($(window).height() * 0.4 - 45),
            collapseDepth: Number('0'),
            headingSelector: 'h2, h3, h4, h5, h6'
        });

        // modify the toc link href to support Chinese.
        let i = 0;
        let tocHeading = 'toc-heading-';
        $('#toc-content a').each(function () {
            $(this).attr('href', '#' + tocHeading + (++i));
        });

        // modify the heading title id to support Chinese.
        i = 0;
        $('#articleContent').children('h2, h3, h4, h5, h6').each(function () {
            $(this).attr('id', tocHeading + (++i));
        });

        // Set scroll toc fixed.
        let tocHeight = parseInt($(window).height() * 0.4 - 64);
        let $tocWidget = $('.toc-widget');
        $(window).scroll(function () {
            let scroll = $(window).scrollTop();
            /* add post toc fixed. */
            if (scroll > tocHeight) {
                $tocWidget.addClass('toc-fixed');
            } else {
                $tocWidget.removeClass('toc-fixed');
            }
        });

        
        /* 修复文章卡片 div 的宽度. */
        let fixPostCardWidth = function (srcId, targetId) {
            let srcDiv = $('#' + srcId);
            if (srcDiv.length === 0) {
                return;
            }

            let w = srcDiv.width();
            if (w >= 450) {
                w = w + 21;
            } else if (w >= 350 && w < 450) {
                w = w + 18;
            } else if (w >= 300 && w < 350) {
                w = w + 16;
            } else {
                w = w + 14;
            }
            $('#' + targetId).width(w);
        };

        // 切换TOC目录展开收缩的相关操作.
        const expandedClass = 'expanded';
        let $tocAside = $('#toc-aside');
        let $mainContent = $('#main-content');
        $('#floating-toc-btn .btn-floating').click(function () {
            if ($tocAside.hasClass(expandedClass)) {
                $tocAside.removeClass(expandedClass).hide();
                $mainContent.removeClass('l9');
            } else {
                $tocAside.addClass(expandedClass).show();
                $mainContent.addClass('l9');
            }
            fixPostCardWidth('artDetail', 'prenext-posts');
        });
        
    });
</script>

    

</main>




    <footer class="page-footer bg-color">
    
        <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/weiyouwozuiku/weiyouwozuiku.github.io@master/libs/aplayer/APlayer.min.css">
<style>
    .aplayer .aplayer-lrc p {
        
        display: none;
        
        font-size: 12px;
        font-weight: 700;
        line-height: 16px !important;
    }

    .aplayer .aplayer-lrc p.aplayer-lrc-current {
        
        display: none;
        
        font-size: 15px;
        color: #42b983;
    }

    
    .aplayer.aplayer-fixed.aplayer-narrow .aplayer-body {
        left: -66px !important;
    }

    .aplayer.aplayer-fixed.aplayer-narrow .aplayer-body:hover {
        left: 0px !important;
    }

    
</style>
<div class="">
    
    <div class="row">
        <meting-js class="col l8 offset-l2 m10 offset-m1 s12"
                   server="netease"
                   type="song"
                   id="1901371647"
                   fixed='true'
                   autoplay='false'
                   theme='#42b983'
                   loop='all'
                   order='random'
                   preload='auto'
                   volume='0.7'
                   list-folded='true'
        >
        </meting-js>
    </div>
</div>

<script src="https://cdn.jsdelivr.net/gh/weiyouwozuiku/weiyouwozuiku.github.io@master/libs/aplayer/APlayer.min.js"></script>
<script src="https://cdn.jsdelivr.net/npm/meting@2/dist/Meting.min.js"></script>

    
    <div class="container row center-align" style="margin-bottom: 15px !important;">
        <div class="col s12 m8 l8 copy-right">
            Copyright&nbsp;&copy;
            
                <span id="year">2018-2023</span>
            
            <span id="year">2018</span>
            <a href="/about" target="_blank">不二</a>
            |&nbsp;Powered by&nbsp;<a href="https://hexo.io/" target="_blank">Hexo</a>
            |&nbsp;Theme&nbsp;<a href="https://github.com/blinkfox/hexo-theme-matery" target="_blank">Matery</a>
            <br>
            
            &nbsp;<i class="fas fa-chart-area"></i>&nbsp;站点总字数:&nbsp;<span
                class="white-color">263.4k</span>&nbsp;字
            
            
            
            
            
            
            <span id="busuanzi_container_site_pv">
                |&nbsp;<i class="far fa-eye"></i>&nbsp;总访问量:&nbsp;<span id="busuanzi_value_site_pv"
                    class="white-color"></span>&nbsp;次
            </span>
            
            
            <span id="busuanzi_container_site_uv">
                |&nbsp;<i class="fas fa-users"></i>&nbsp;总访问人数:&nbsp;<span id="busuanzi_value_site_uv"
                    class="white-color"></span>&nbsp;人
            </span>
            
            <br>
            
            <span id="sitetime">载入运行时间...</span>
            <script>
                function siteTime() {
                    var seconds = 1000;
                    var minutes = seconds * 60;
                    var hours = minutes * 60;
                    var days = hours * 24;
                    var years = days * 365;
                    var today = new Date();
                    var startYear = "2018";
                    var startMonth = "5";
                    var startDate = "25";
                    var startHour = "17";
                    var startMinute = "20";
                    var startSecond = "53";
                    var todayYear = today.getFullYear();
                    var todayMonth = today.getMonth() + 1;
                    var todayDate = today.getDate();
                    var todayHour = today.getHours();
                    var todayMinute = today.getMinutes();
                    var todaySecond = today.getSeconds();
                    var t1 = Date.UTC(startYear, startMonth, startDate, startHour, startMinute, startSecond);
                    var t2 = Date.UTC(todayYear, todayMonth, todayDate, todayHour, todayMinute, todaySecond);
                    var diff = t2 - t1;
                    var diffYears = Math.floor(diff / years);
                    var diffDays = Math.floor((diff / days) - diffYears * 365);
                    var diffHours = Math.floor((diff - (diffYears * 365 + diffDays) * days) / hours);
                    var diffMinutes = Math.floor((diff - (diffYears * 365 + diffDays) * days - diffHours * hours) /
                        minutes);
                    var diffSeconds = Math.floor((diff - (diffYears * 365 + diffDays) * days - diffHours * hours -
                        diffMinutes * minutes) / seconds);
                    if (startYear == todayYear) {
                        document.getElementById("year").innerHTML = todayYear;
                        document.getElementById("sitetime").innerHTML = "本站已安全运行 " + diffDays + " 天 " + diffHours +
                            " 小时 " + diffMinutes + " 分钟 " + diffSeconds + " 秒";
                    } else {
                        document.getElementById("year").innerHTML = startYear + " - " + todayYear;
                        document.getElementById("sitetime").innerHTML = "本站已安全运行 " + diffYears + " 年 " + diffDays +
                            " 天 " + diffHours + " 小时 " + diffMinutes + " 分钟 " + diffSeconds + " 秒";
                    }
                }
                setInterval(siteTime, 1000);
            </script>
            
            <br>
            
        </div>
        <div class="col s12 m4 l4 social-link social-statis">
    <a href="https://github.com/weiyouwozuiku" class="tooltipped" target="_blank" data-tooltip="访问我的GitHub" data-position="top" data-delay="50">
        <i class="fab fa-github"></i>
    </a>



    <a href="mailto:weiyouwozuiku@gmail.com" class="tooltipped" target="_blank" data-tooltip="邮件联系我" data-position="top" data-delay="50">
        <i class="fas fa-envelope-open"></i>
    </a>







    <a href="tencent://AddContact/?fromId=50&fromSubId=1&subcmd=all&uin=1104891151" class="tooltipped" target="_blank" data-tooltip="QQ联系我: 1104891151" data-position="top" data-delay="50">
        <i class="fab fa-qq"></i>
    </a>





    <a href="https://www.zhihu.com/people/he-he-9-35-47" class="tooltipped" target="_blank" data-tooltip="关注我的知乎: https://www.zhihu.com/people/he-he-9-35-47" data-position="top" data-delay="50">
        <i class="fab fa-zhihu1">知</i>
    </a>



    <a href="/atom.xml" class="tooltipped" target="_blank" data-tooltip="RSS 订阅" data-position="top" data-delay="50">
        <i class="fas fa-rss"></i>
    </a>

</div>
    </div>
</footer>

<div class="progress-bar"></div>

    <script src='https://unpkg.com/mermaid@8.14.0/dist/mermaid.min.js'></script>
    <script>
      if (window.mermaid) {
        mermaid.initialize({theme: 'forest'});
      }
    </script>
  

    <!-- 搜索遮罩框 -->
<div id="searchModal" class="modal">
    <div class="modal-content">
        <div class="search-header">
            <span class="title"><i class="fas fa-search"></i>&nbsp;&nbsp;搜索</span>
            <input type="search" id="searchInput" name="s" placeholder="请输入搜索的关键字"
                   class="search-input">
        </div>
        <div id="searchResult"></div>
    </div>
</div>

<script type="text/javascript">
$(function () {
    var searchFunc = function (path, search_id, content_id) {
        'use strict';
        $.ajax({
            url: path,
            dataType: "xml",
            success: function (xmlResponse) {
                // get the contents from search data
                var datas = $("entry", xmlResponse).map(function () {
                    return {
                        title: $("title", this).text(),
                        content: $("content", this).text(),
                        url: $("url", this).text()
                    };
                }).get();
                var $input = document.getElementById(search_id);
                var $resultContent = document.getElementById(content_id);
                $input.addEventListener('input', function () {
                    var str = '<ul class=\"search-result-list\">';
                    var keywords = this.value.trim().toLowerCase().split(/[\s\-]+/);
                    $resultContent.innerHTML = "";
                    if (this.value.trim().length <= 0) {
                        return;
                    }
                    // perform local searching
                    datas.forEach(function (data) {
                        var isMatch = true;
                        var data_title = data.title.trim().toLowerCase();
                        var data_content = data.content.trim().replace(/<[^>]+>/g, "").toLowerCase();
                        var data_url = data.url;
                        data_url = data_url.indexOf('/') === 0 ? data.url : '/' + data_url;
                        var index_title = -1;
                        var index_content = -1;
                        var first_occur = -1;
                        // only match artiles with not empty titles and contents
                        if (data_title !== '' && data_content !== '') {
                            keywords.forEach(function (keyword, i) {
                                index_title = data_title.indexOf(keyword);
                                index_content = data_content.indexOf(keyword);
                                if (index_title < 0 && index_content < 0) {
                                    isMatch = false;
                                } else {
                                    if (index_content < 0) {
                                        index_content = 0;
                                    }
                                    if (i === 0) {
                                        first_occur = index_content;
                                    }
                                }
                            });
                        }
                        // show search results
                        if (isMatch) {
                            str += "<li><a href='" + data_url + "' class='search-result-title'>" + data_title + "</a>";
                            var content = data.content.trim().replace(/<[^>]+>/g, "");
                            if (first_occur >= 0) {
                                // cut out 100 characters
                                var start = first_occur - 20;
                                var end = first_occur + 80;
                                if (start < 0) {
                                    start = 0;
                                }
                                if (start === 0) {
                                    end = 100;
                                }
                                if (end > content.length) {
                                    end = content.length;
                                }
                                var match_content = content.substr(start, end);
                                // highlight all keywords
                                keywords.forEach(function (keyword) {
                                    var regS = new RegExp(keyword, "gi");
                                    match_content = match_content.replace(regS, "<em class=\"search-keyword\">" + keyword + "</em>");
                                });

                                str += "<p class=\"search-result\">" + match_content + "...</p>"
                            }
                            str += "</li>";
                        }
                    });
                    str += "</ul>";
                    $resultContent.innerHTML = str;
                });
            }
        });
    };

    searchFunc('/search.xml', 'searchInput', 'searchResult');
});
</script>

    <!-- 回到顶部按钮 -->
<div id="backTop" class="top-scroll">
    <a class="btn-floating btn-large waves-effect waves-light" href="#!">
        <i class="fas fa-arrow-up"></i>
    </a>
</div>


    <script src="https://cdn.jsdelivr.net/gh/weiyouwozuiku/weiyouwozuiku.github.io@master/libs/materialize/materialize.min.js"></script>
    <script src="https://cdn.jsdelivr.net/gh/weiyouwozuiku/weiyouwozuiku.github.io@master/libs/masonry/masonry.pkgd.min.js"></script>
    <script src="https://cdn.jsdelivr.net/gh/weiyouwozuiku/weiyouwozuiku.github.io@master/libs/aos/aos.js"></script>
    <script src="https://cdn.jsdelivr.net/gh/weiyouwozuiku/weiyouwozuiku.github.io@master/libs/scrollprogress/scrollProgress.min.js"></script>
    <script src="https://cdn.jsdelivr.net/gh/weiyouwozuiku/weiyouwozuiku.github.io@master/libs/lightGallery/js/lightgallery-all.min.js"></script>
    <script src="https://cdn.jsdelivr.net/gh/weiyouwozuiku/weiyouwozuiku.github.io@master/js/matery.js"></script>

    <!-- Baidu Analytics -->

<script>
    var _hmt = _hmt || [];
    (function () {
        var hm = document.createElement("script");
        hm.src = "https://hm.baidu.com/hm.js?943a68f473d421f2d98228f74edc63d0";
        var s = document.getElementsByTagName("script")[0];
        s.parentNode.insertBefore(hm, s);
    })();
</script>

    <!-- Baidu Push -->

<script>
    (function () {
        var bp = document.createElement('script');
        var curProtocol = window.location.protocol.split(':')[0];
        if (curProtocol === 'https') {
            bp.src = 'https://zz.bdstatic.com/linksubmit/push.js';
        } else {
            bp.src = 'http://push.zhanzhang.baidu.com/push.js';
        }
        var s = document.getElementsByTagName("script")[0];
        s.parentNode.insertBefore(bp, s);
    })();
</script>

    
    
    <script async src="https://cdn.jsdelivr.net/gh/weiyouwozuiku/weiyouwozuiku.github.io@master/libs/others/busuanzi.pure.mini.js"></script>
    

    

    

    <!--腾讯兔小巢-->
    
    
    <script type="text/javascript" color="0,0,255"
        pointColor="0,0,255" opacity='0.7'
        zIndex="-1" count="150"
        src="https://cdn.jsdelivr.net/gh/weiyouwozuiku/weiyouwozuiku.github.io@master/libs/background/canvas-nest.js"></script>
    

    
    
    <script type="text/javascript" size="150" alpha='0.6'
        zIndex="-1" src="https://cdn.jsdelivr.net/gh/weiyouwozuiku/weiyouwozuiku.github.io@master/libs/background/ribbon-refresh.min.js" async="async"></script>
    

    
    <script type="text/javascript" src="https://cdn.jsdelivr.net/gh/weiyouwozuiku/weiyouwozuiku.github.io@master/libs/background/ribbon-dynamic.js" async="async"></script>
    

    
    <script src="https://cdn.jsdelivr.net/gh/weiyouwozuiku/weiyouwozuiku.github.io@master/libs/instantpage/instantpage.js" type="module"></script>
    

</body>

</html>
